{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "20f5f070",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import os, time, datetime, ast\n",
    "from itertools import product\n",
    "import statsmodels.formula.api as smf\n",
    "from statsmodels.iolib.summary2 import summary_col\n",
    "import re\n",
    "\n",
    "import geopandas as gpd\n",
    "from shapely.geometry import Point, Polygon\n",
    "import shapely.wkt\n",
    "import geopy.distance as geodist\n",
    "\n",
    "import requests\n",
    "import time\n",
    "import json\n",
    "import random\n",
    "\n",
    "os.chdir(\"/Users/xiaosongw/Dropbox/Research/InformedSources/Replication/Build\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "2e50ee8f-0e88-482e-b989-780877e32e35",
   "metadata": {},
   "source": [
    "# Melbourne Stations"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "333c68d0-ac76-463a-8885-b26b46c115ad",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_st_mel = pd.read_csv(\"./Output/st_mel_full.csv\")\n",
    "df_st_mel['coor']=df_st_mel['coor'].str.split(\", \").apply(\n",
    "    lambda x: '{},{}'.format(x[1], x[0])).str.replace(\"(\", \"\", regex=True).str.replace(\")\", \"\", regex=True)\n",
    "d_st2coor = df_st_mel.set_index('id')['coor'].to_dict()\n",
    "print(len(d_st2coor))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ded5adf3",
   "metadata": {},
   "outputs": [],
   "source": [
    "with open('./Output/tzn2coor_mel.txt', 'r') as file:\n",
    "    data=file.read()\n",
    "d_tzn2coor={int(k):v for k, v in json.loads(data).items()}\n",
    "print(len(d_tzn2coor))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f70d9edc",
   "metadata": {},
   "outputs": [],
   "source": [
    "l_st = list(d_st2coor.keys())\n",
    "l_tz = list(d_tzn2coor.keys())"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "b3627391-7449-4987-9487-5b6b4daeb063",
   "metadata": {},
   "source": [
    "# station to traffic zone"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "cb20791d",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_st2tz=pd.DataFrame(product(l_st, l_tz))\n",
    "df_st2tz.columns=['id', 'tz']\n",
    "print(df_st2tz.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9415e3b0",
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "start = time.time()\n",
    "l_in=df_st2tz['id'].unique().tolist()\n",
    "ii=0\n",
    "for i in l_in:\n",
    "    l_dest_tzn=df_st2tz.loc[df_st2tz['id']==i, 'tz'].tolist()\n",
    "    l_dest_coor=df_st2tz.loc[df_st2tz['id']==i, 'tz'].map(d_tzn2coor).tolist()\n",
    "    n_=len(l_dest_tzn)//100\n",
    "    l_dist_out=[]\n",
    "    l_dura_out=[]\n",
    "    l_dest_out=[]\n",
    "    str_orig=d_st2coor[i]\n",
    "    for j in range(0,n_+1):\n",
    "        str_dest=';'.join(l_dest_coor[j*100:(j+1)*100])\n",
    "        url='http://router.project-osrm.org/table/v1/driving/{};{}?sources=0&annotations=distance,duration'.format(\n",
    "            str_orig, str_dest)\n",
    "        try:\n",
    "            out=requests.get(url).content\n",
    "            jout=json.loads(out)\n",
    "            l_dist_out=l_dist_out+jout['distances'][0][1:]\n",
    "            l_dura_out=l_dura_out+jout['durations'][0][1:]\n",
    "            l_dest_out=l_dest_out+l_dest_tzn[j*100:(j+1)*100]\n",
    "        except: \n",
    "            print('error {}'.format(i))\n",
    "            continue\n",
    "    df_out=pd.DataFrame({'tz':l_dest_out, 'distance':l_dist_out, 'duration':l_dura_out})\n",
    "    df_out['id']=i\n",
    "    ii+=len(df_out)\n",
    "    df_out.to_csv(\"./Temp/st2tz_st{}.csv\".format(i), index=False)\n",
    "    print('st {} done! collected {}. Time {} min'.format(i, ii, round((time.time()-start)/60, 1)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b28f38f3",
   "metadata": {},
   "outputs": [],
   "source": [
    "%%time\n",
    "df_st2tz = pd.DataFrame()\n",
    "fpath = \"./Temp/\"\n",
    "l_files = l_files = [i for i in os.listdir(fpath) if 'st2tz_st' in i]\n",
    "for i in l_files:\n",
    "    _df = pd.read_csv(fpath+i)\n",
    "    df_st2tz = pd.concat([df_st2tz, _df], axis=0)\n",
    "df_st2tz['duration'] = df_st2tz['duration'] / 60\n",
    "print(df_st2tz.shape)\n",
    "display(df_st2tz.head(2))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "fc3f2496-9962-4f0f-9ac2-2c8bf15ebc49",
   "metadata": {},
   "outputs": [],
   "source": [
    "for i in l_files:\n",
    "    os.remove(\"./Temp/\"+i)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e61a14a2",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_st2tz.to_csv(\"./Output/st2tz_out.csv\", index=False)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "947b4e07-c76d-411d-b794-50385afd0684",
   "metadata": {},
   "source": [
    "# Traffic Zone to Station "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "aa1ad848-e582-4602-afaf-5dde15a9c1bc",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_tz2st=pd.DataFrame(product(l_tz, l_st))\n",
    "df_tz2st.columns=['tz', 'id']\n",
    "print(df_tz2st.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "07432a3d-9953-4825-a89c-a6fa5a52d862",
   "metadata": {},
   "outputs": [],
   "source": [
    "start = time.time()\n",
    "l_in=df_tz2st['tz'].unique().tolist()\n",
    "ii=0\n",
    "for i in l_in:\n",
    "    l_dest_id=df_tz2st.loc[df_tz2st['tz']==i, 'id'].tolist()\n",
    "    l_dest_coor=df_tz2st.loc[df_tz2st['tz']==i, 'id'].map(d_st2coor).tolist()\n",
    "    n_=len(l_dest_id)//100\n",
    "    l_dist_out=[]\n",
    "    l_dura_out=[]\n",
    "    l_dest_out=[]\n",
    "    str_orig=d_tzn2coor[i]\n",
    "    for j in range(0,n_+1):\n",
    "        str_dest=';'.join(l_dest_coor[j*100:(j+1)*100])\n",
    "        url='http://router.project-osrm.org/table/v1/driving/{};{}?sources=0&annotations=distance,duration'.format(\n",
    "            str_orig, str_dest)\n",
    "        try:\n",
    "            out=requests.get(url).content\n",
    "            jout=json.loads(out)\n",
    "            l_dist_out=l_dist_out+jout['distances'][0][1:]\n",
    "            l_dura_out=l_dura_out+jout['durations'][0][1:]\n",
    "            l_dest_out=l_dest_out+l_dest_id[j*100:(j+1)*100]\n",
    "        except: \n",
    "            print('error {}'.format(i))\n",
    "            continue\n",
    "    df_out=pd.DataFrame({'id':l_dest_out, 'distance':l_dist_out, 'duration':l_dura_out})\n",
    "    df_out['tz']=i\n",
    "    ii+=len(df_out)\n",
    "    df_out.to_csv(\"./Temp/tz2st_tz{}.csv\".format(i), index=False)\n",
    "    print('st {} done! collected {}. Time {} min'.format(i, ii, round((time.time()-start)/60, 1)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f0bee189-0c8d-4131-95d7-a602cfe397c2",
   "metadata": {},
   "outputs": [],
   "source": [
    "%%time\n",
    "df_tz2st = pd.DataFrame()\n",
    "fpath = \"./Temp/\"\n",
    "l_files = l_files = [i for i in os.listdir(fpath) if 'tz2st_tz' in i]\n",
    "for i in l_files:\n",
    "    _df = pd.read_csv(fpath+i)\n",
    "    df_tz2st = pd.concat([df_tz2st, _df], axis=0)\n",
    "df_tz2st['duration'] = df_tz2st['duration'] / 60\n",
    "print(df_tz2st.shape)\n",
    "display(df_tz2st.head(2))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ee42a9b6-06c8-4351-adc5-8e0893dac645",
   "metadata": {},
   "outputs": [],
   "source": [
    "for i in l_files:\n",
    "    os.remove(\"./Temp/\"+i)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f1c340a3-57f2-4ed1-a4dc-19fc6e6e00a3",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_st2tz.to_csv(\"./Output/tz2st_out.csv\", index=False)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.6"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": true,
   "toc_window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
